#! /usr/bin/env python
#coding=utf-8

from sgmllib import SGMLParser
import re

class HrefDataCatcher(SGMLParser):

    def reset(self):
        SGMLParser.reset(self)
        self.data = []
        self.count = 0

    def process(self, text):
        if re.match(ur'第\D{1,16}章', text):
            return text
        return ''

    def start_a(self, attrs):
        self.count += 1

    def end_a(self):
        self.count -= 1

    def handle_data(self, text):
        if self.count:
            m = self.process(text)
            if m:
                self.data.append(m)

if __name__ == "__main__":
    import urllib
    usock = urllib.urlopen("http://book.zongheng.com/showchapter/72267.html")
    parser = HrefDataCatcher()
    parser.feed(usock.read().decode('gbk'))
    parser.close()
    usock.close()
    for row in parser.data:
        print row